#!/usr/bin/python
# --coding:utf-8--



# 使用 lxml 的 etree 库
from lxml import etree

# 注意第22行代码中li标签 缺少了一个</li>闭合标签
text = '''
<div>
    <ul>
         <li class="item-0"><a href="https://www.dushu.com/lianzai/1115_1.html">first item</a></li>
         <li class="item-1"><a href="https://www.dushu.com/lianzai/1115_2.html">second item</a></li>
         <li class="item-inactive"><a href="https://www.dushu.com/lianzai/1115_3.html">third item</a></li>
         <li class="item-1"><a href="https://www.dushu.com/lianzai/1115_4.html">fourth item</a></li>
         <li class="item-0"><a href="https://www.dushu.com/lianzai/1115_5.html">fifth item</a>
     </ul>
 </div>
'''

# 利用etree.HTML，将字符串解析为HTML文档
html = etree.HTML(text)
# print(type(html))

# 按字符串序列化HTML文档
result = etree.tostring(html)

# 结果lxml会自动修改HTML代码，上述text中不仅补全了li标签，还添加了body，html标签
print(result)
# print(type(result))
# f = open('case_demo.html', 'wb')
# f.write(result)
# f.close()
